In [10]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
#%matplotlib inline
In [11]:
#taking a dataset asylum_seekers_monthly.csv
In [12]:
ASM=pd.read_csv('C:\\Users\\Dell\\OneDrive\\Desktop\\PROJECTS\\DV\\asylum_seekers_monthly.csv')
ASM
C:\Users\Dell\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3063: DtypeWarning: Columns (4) have mixed types.Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
Out[12]:
Country / territory of asylum/residence Origin Year Month Value
0 Australia Afghanistan 1999 January 8
1 Australia Afghanistan 1999 February 10
2 Australia Afghanistan 1999 March 25
3 Australia Afghanistan 1999 April 25
4 Australia Afghanistan 1999 May 7
... ... ... ... ... ...
332184 USA (INS/DHS) Zimbabwe 2016 December 28
332185 USA (INS/DHS) Zimbabwe 2017 February 27
332186 USA (INS/DHS) Zimbabwe 2017 March 42
332187 USA (INS/DHS) Zimbabwe 2017 April 16
332188 USA (INS/DHS) Zimbabwe 2017 May 12

332189 rows × 5 columns

In [13]:
ASM.head()
Out[13]:
Country / territory of asylum/residence Origin Year Month Value
0 Australia Afghanistan 1999 January 8
1 Australia Afghanistan 1999 February 10
2 Australia Afghanistan 1999 March 25
3 Australia Afghanistan 1999 April 25
4 Australia Afghanistan 1999 May 7
In [15]:
df = pd.DataFrame(ASM)
df['Value'] = pd.to_numeric(df['Value'],errors='coerce')
df = df.replace(np.nan, 0, regex=True)
df['Value'] = df['Value'].astype(int)

print (df)
print (df.dtypes)
       Country / territory of asylum/residence       Origin  Year     Month  \
0                                    Australia  Afghanistan  1999   January   
1                                    Australia  Afghanistan  1999  February   
2                                    Australia  Afghanistan  1999     March   
3                                    Australia  Afghanistan  1999     April   
4                                    Australia  Afghanistan  1999       May   
...                                        ...          ...   ...       ...   
332184                           USA (INS/DHS)     Zimbabwe  2016  December   
332185                           USA (INS/DHS)     Zimbabwe  2017  February   
332186                           USA (INS/DHS)     Zimbabwe  2017     March   
332187                           USA (INS/DHS)     Zimbabwe  2017     April   
332188                           USA (INS/DHS)     Zimbabwe  2017       May   

        Value  
0           8  
1          10  
2          25  
3          25  
4           7  
...       ...  
332184     28  
332185     27  
332186     42  
332187     16  
332188     12  

[332189 rows x 5 columns]
Country / territory of asylum/residence    object
Origin                                     object
Year                                        int64
Month                                      object
Value                                       int32
dtype: object
In [30]:
origin_plot= df.Origin.value_counts().plot(kind="bar",x=data["Origin"],title="Countries from which most people migrate",legend=True)
fig = origin_plot.get_figure()
plt.figure(figsize=(750,300))
Out[30]:
<Figure size 54000x21600 with 0 Axes>
<Figure size 54000x21600 with 0 Axes>
In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
ASM=pd.read_csv("C:\\Users\\Dell\\OneDrive\\Desktop\\PROJECTS\\DV\\asylum_seekers_monthly.csv")
fig=plt.figure()
ax=fig.add_axes([0,0,1,1])
x=ASM["Origin"]
x=list(dict.fromkeys(x))
y=ASM.Origin.value_counts()
print(y)
y.plot(kind="bar",x=ASM["Origin"],legend="True")
ax.set_title("Countries from which most people migrate",fontsize="20")

fig.set_size_inches(15,10)
plt.savefig("C:\\Users\\Dell\\OneDrive\\Desktop\\PROJECTS\\DV\\graphs\\1.jpg")
C:\Users\Dell\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3063: DtypeWarning: Columns (4) have mixed types.Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
Iraq                                6367
Iran (Islamic Rep. of)              6207
Afghanistan                         6122
Pakistan                            6109
Nigeria                             5844
                                    ... 
French Guiana                          5
Liechtenstein                          5
Micronesia (Federated States of)       5
Puerto Rico                            5
New Caledonia                          4
Name: Origin, Length: 205, dtype: int64
In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
ASM=pd.read_csv("C:\\Users\\Dell\\OneDrive\\Desktop\\PROJECTS\\DV\\asylum_seekers_monthly.csv")
fig=plt.figure()
ax=fig.add_axes([0,0,1,1])
x=ASM["territory_of_asylum"]
x=list(dict.fromkeys(x))
y=ASM.territory_of_asylum.value_counts()
print(y)
y.plot(kind="bar",x=ASM["territory_of_asylum"],legend="True")
ax.set_title("Countries to which most people migrate",fontsize="20")

fig.set_size_inches(15,10)
plt.savefig("C:\\Users\\Dell\\OneDrive\\Desktop\\PROJECTS\\DV\\graphs\\2.jpg")
C:\Users\Dell\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3063: DtypeWarning: Columns (4) have mixed types.Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
Canada                                                  25693
USA (INS/DHS)                                           19734
USA (EOIR)                                              18559
Germany                                                 18147
France                                                  17870
United Kingdom of Great Britain and Northern Ireland    17709
Sweden                                                  17035
Belgium                                                 16336
Switzerland                                             15404
Netherlands                                             14548
Norway                                                  11978
Austria                                                 11905
Australia                                               11220
Spain                                                   10477
Denmark                                                  9420
Ireland                                                  9131
Greece                                                   7616
Finland                                                  7555
Italy                                                    7050
Hungary                                                  6597
Czech Rep.                                               5836
Poland                                                   4616
Turkey                                                   4516
Cyprus                                                   4500
Luxembourg                                               4141
Romania                                                  3800
Japan                                                    3528
Bulgaria                                                 3255
Slovakia                                                 3223
New Zealand                                              2995
Rep. of Korea                                            2946
Slovenia                                                 2223
Portugal                                                 2161
Malta                                                    1953
Serbia and Kosovo: S/RES/1244 (1999)                     1585
Croatia                                                  1455
Iceland                                                   989
Lithuania                                                 894
The former Yugoslav Rep. of Macedonia                     747
Montenegro                                                731
Liechtenstein                                             649
Latvia                                                    537
Estonia                                                   412
Bosnia and Herzegovina                                    312
Albania                                                   201
Name: territory_of_asylum, dtype: int64
In [4]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
data=pd.read_csv("C:\\Users\\Dell\\OneDrive\\Desktop\\PROJECTS\\DV\\asylum_seekers_monthly.csv")
y=data.Origin.value_counts()
val=[]
sta=[]
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
for x in range(0,len(y),1):
    if(y[x]>=30):
        val.append(y[x])
key=y.keys()
for i in range(0,len(val),1):
    sta.append(key[i])
ax.pie(val,labels=sta,autopct='%1.2f%%')
fig.set_size_inches(18.5, 10.5)
plt.savefig("C:\\Users\\Dell\\OneDrive\\Desktop\\PROJECTS\\DV\\graphs\\3.jpg")
C:\Users\Dell\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3063: DtypeWarning: Columns (4) have mixed types.Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
In [5]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
data=pd.read_csv("C:\\Users\\Dell\\OneDrive\\Desktop\\PROJECTS\\DV\\asylum_seekers_monthly.csv")
y=data.territory_of_asylum.value_counts()
val=[]
sta=[]
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
for x in range(0,len(y),1):
    if(y[x]>=30):
        val.append(y[x])
key=y.keys()
for i in range(0,len(val),1):
    sta.append(key[i])
ax.pie(val,labels=sta,autopct='%1.2f%%')
fig.set_size_inches(18.5, 10.5)
plt.savefig("C:\\Users\\Dell\\OneDrive\\Desktop\\PROJECTS\\DV\\graphs\\4.jpg")
In [45]:
print (ASM.dtypes)
territory_of_asylum    object
Origin                 object
Year                    int64
Month                  object
Value                  object
dtype: object
In [46]:
df = pd.DataFrame(ASM)
df['Value'] = pd.to_numeric(df['Value'],errors='coerce')
df = df.replace(np.nan, 0, regex=True)
df['Value'] = df['Value'].astype(int)

print (df)
print (df.dtypes)
       territory_of_asylum       Origin  Year     Month  Value
0                Australia  Afghanistan  1999   January      8
1                Australia  Afghanistan  1999  February     10
2                Australia  Afghanistan  1999     March     25
3                Australia  Afghanistan  1999     April     25
4                Australia  Afghanistan  1999       May      7
...                    ...          ...   ...       ...    ...
332184       USA (INS/DHS)     Zimbabwe  2016  December     28
332185       USA (INS/DHS)     Zimbabwe  2017  February     27
332186       USA (INS/DHS)     Zimbabwe  2017     March     42
332187       USA (INS/DHS)     Zimbabwe  2017     April     16
332188       USA (INS/DHS)     Zimbabwe  2017       May     12

[332189 rows x 5 columns]
territory_of_asylum    object
Origin                 object
Year                    int64
Month                  object
Value                   int32
dtype: object
In [6]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
ASM=pd.read_csv("C:\\Users\\Dell\\OneDrive\\Desktop\\PROJECTS\\DV\\asylum_seekers_monthly.csv")
df = pd.DataFrame(ASM)
df['Value'] = pd.to_numeric(df['Value'],errors='coerce')
df = df.replace(np.nan, 0, regex=True)
df['Value'] = df['Value'].astype(int)

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
x=df["Year"]
y=df["Value"]
ax.set_xlabel("Year",fontsize="20")
ax.set_ylabel("Value",fontsize="20")
ax.set_title("Immigrants accordind to year between 1999 to 2016",fontsize="20" )
ax.scatter(x,y)
fig.set_size_inches(15,10)
plt.savefig("C:\\Users\\Dell\\OneDrive\\Desktop\\PROJECTS\\DV\\graphs\\5.jpg")
In [2]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
df = pd.read_csv("C:\\Users\\Dell\\OneDrive\\Desktop\\PROJECTS\\DV\\asylum_seekers_monthly.csv")
df.isna().sum()
df[df.Value == '*']
df = df[df.Value != '*']
C:\Users\Dell\anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3063: DtypeWarning: Columns (4) have mixed types.Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
Out[2]:
territory_of_asylum    0
Origin                 0
Year                   0
Month                  0
Value                  0
dtype: int64
In [5]:
#since value is in object so converting it into int
#df['Value'].astype(str)
df['Value'] = pd.to_numeric(df['Value'])
In [6]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 320348 entries, 0 to 332188
Data columns (total 5 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   territory_of_asylum  320348 non-null  object
 1   Origin               320348 non-null  object
 2   Year                 320348 non-null  int64 
 3   Month                320348 non-null  object
 4   Value                320348 non-null  int64 
dtypes: int64(2), object(3)
memory usage: 14.7+ MB
In [7]:
# Heatmap visualization
In [9]:
grouped_df = df.groupby(['territory_of_asylum','Origin'])['Year'].count().reset_index()
grouped_df = grouped_df.pivot('territory_of_asylum', 'Origin', 'Year')
grouped_df.head()
plt.figure(figsize=(20,20))
sns.heatmap(grouped_df,cmap="YlGnBu")
plt.title("Heatmap showing territory_of_asylum vs Origin in yearwise")
plt.show()
In [10]:
pd.crosstab(df["territory_of_asylum"]=="Australia",df["Origin"]=='Afghanistan')
Out[10]:
Origin False True
territory_of_asylum
False 303344 5784
True 11010 210
In [11]:
## since the above is not much clear. so lets do by each country .i.e seeing the heatmap of each territory_of_asylum vs all Origin
In [13]:
grouped_df = df.groupby(['territory_of_asylum','Origin'])['Year'].count().reset_index()
grouped_df.head()
def country(x):
    grouped_df1 =grouped_df[grouped_df['territory_of_asylum']==x]
    grouped_df1 = grouped_df1.pivot(index='territory_of_asylum',columns= 'Origin',values= 'Year')
    return grouped_df1
tmo=df['territory_of_asylum'].value_counts()
tmo.index
for con in tmo.index:
    grouped_df1 = country(con)
    plt.figure(figsize=(20,5))
    sns.heatmap(grouped_df1,cmap="YlGnBu")
    plt.title(f"Heatmap showing {con} vs Origin in yearwise")
    plt.show()
In [14]:
## now lets do the same but with respect to total refugee count
In [15]:
grouped_df = df.groupby(['territory_of_asylum','Origin'])['Value'].sum().reset_index()
grouped_df.head()
def country(x):
    grouped_df1 =grouped_df[grouped_df['territory_of_asylum']==x]
    grouped_df1 = grouped_df1.pivot(index='territory_of_asylum',columns= 'Origin',values= 'Value')
    return grouped_df1
grouped_df1 =grouped_df[grouped_df['territory_of_asylum']=='Afghanistan']
grouped_df1 = grouped_df1.pivot(index='territory_of_asylum',columns= 'Origin',values= 'Value')
for con in tmo.index:
    grouped_df1 = country(con)
    plt.figure(figsize=(20,5))
    sns.heatmap(grouped_df1,cmap="YlGnBu")
    plt.title(f"Heatmap showing {con} vs Origin in yearwise")
    plt.show()
    
In [ ]: